In [5]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
import plotly.express as px 
import seaborn as sns 
import plotly.graph_objects as go
import plotly.io as pio
In [6]:
c = pd.read_csv('covid_19_clean_complete.csv')
c.head()
Out[6]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered Active WHO Region
0 NaN Afghanistan 33.93911 67.709953 2020-01-22 0 0 0 0 Eastern Mediterranean
1 NaN Albania 41.15330 20.168300 2020-01-22 0 0 0 0 Europe
2 NaN Algeria 28.03390 1.659600 2020-01-22 0 0 0 0 Africa
3 NaN Andorra 42.50630 1.521800 2020-01-22 0 0 0 0 Europe
4 NaN Angola -11.20270 17.873900 2020-01-22 0 0 0 0 Africa
In [7]:
c.describe()
Out[7]:
Lat Long Confirmed Deaths Recovered Active
count 49068.000000 49068.000000 4.906800e+04 49068.000000 4.906800e+04 4.906800e+04
mean 21.433730 23.528236 1.688490e+04 884.179160 7.915713e+03 8.085012e+03
std 24.950320 70.442740 1.273002e+05 6313.584411 5.480092e+04 7.625890e+04
min -51.796300 -135.000000 0.000000e+00 0.000000 0.000000e+00 -1.400000e+01
25% 7.873054 -15.310100 4.000000e+00 0.000000 0.000000e+00 0.000000e+00
50% 23.634500 21.745300 1.680000e+02 2.000000 2.900000e+01 2.600000e+01
75% 41.204380 80.771797 1.518250e+03 30.000000 6.660000e+02 6.060000e+02
max 71.706900 178.065000 4.290259e+06 148011.000000 1.846641e+06 2.816444e+06
In [8]:
c.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 49068 entries, 0 to 49067
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Province/State  14664 non-null  object 
 1   Country/Region  49068 non-null  object 
 2   Lat             49068 non-null  float64
 3   Long            49068 non-null  float64
 4   Date            49068 non-null  object 
 5   Confirmed       49068 non-null  int64  
 6   Deaths          49068 non-null  int64  
 7   Recovered       49068 non-null  int64  
 8   Active          49068 non-null  int64  
 9   WHO Region      49068 non-null  object 
dtypes: float64(2), int64(4), object(4)
memory usage: 3.7+ MB
In [9]:
c.shape
Out[9]:
(49068, 10)
In [10]:
c.isna().sum()
Out[10]:
Province/State    34404
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             0
Deaths                0
Recovered             0
Active                0
WHO Region            0
dtype: int64
In [11]:
c.drop('Province/State', axis = 1, inplace = True)
In [12]:
c['Date'] = pd.to_datetime(c['Date'])
In [13]:
who =c.groupby('WHO Region')[['WHO Region','Deaths']].sum().sort_values(by=['Deaths'],ascending=False).head(10)
who.reset_index(inplace = True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\1601602620.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  who =c.groupby('WHO Region')[['WHO Region','Deaths']].sum().sort_values(by=['Deaths'],ascending=False).head(10)
In [14]:
who.columns
Out[14]:
Index(['WHO Region', 'Deaths'], dtype='object')
In [15]:
fig= px.bar(who,x='Deaths', y='WHO Region',color = 'WHO Region', title = 'Records of Deaths in the WHO Region' )
fig.show()
In [16]:
who_is_who = c['WHO Region'].unique()
who_is_who
Out[16]:
array(['Eastern Mediterranean', 'Europe', 'Africa', 'Americas',
       'Western Pacific', 'South-East Asia'], dtype=object)
In [17]:
recovered = c.groupby('WHO Region')[['WHO Region','Recovered']].sum().sort_values(by=['Recovered'],ascending=False)
recovered.reset_index(inplace = True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\756317827.py:1: FutureWarning:

The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.

In [18]:
fig = px.pie(recovered, values ='Recovered',names= 'WHO Region', title = 'Total Recovered in the WHO Region', hole = 0.4 )
fig.show()
In [19]:
group_who = c.groupby('WHO Region')[['WHO Region', 'Active','Recovered','Deaths']].sum()
group_who.reset_index(inplace= True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\1700114451.py:1: FutureWarning:

The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.

In [20]:
anchos = [0.2] * 6
fig = go.Figure()
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Recovered'], 
                     width = anchos, name = 'Recovered Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Deaths'], 
                     width = anchos, name = 'Death Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Active'], 
                     width = anchos, name = 'Active Cases'))


fig.update_layout(title =  "WHO Regions Stats",
                  barmode = 'group', title_font_size = 40)
fig.update_xaxes(title_text = 'WHO Regions')
fig.update_yaxes(title_text = "Number of cases")

fig.show()
In [21]:
fig = go.Figure()
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Recovered'], 
                     name = 'Recovered Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Active'],
                     name = 'Active Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
                     y = group_who['Deaths'], 
                     name = 'Death Cases'))


fig.update_layout(title =  "WHO Regions Stats",
                  barmode = 'stack', title_font_size = 40)
fig.update_xaxes(title_text = 'WHO Regions')
fig.update_yaxes(title_text = "Number of cases")
In [22]:
rs = np.random.RandomState(0)
df = pd.DataFrame(rs.rand(10, 10))
corr = c.corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\308013264.py:3: FutureWarning:

The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.

C:\Users\karth\AppData\Local\Temp\ipykernel_22732\308013264.py:4: FutureWarning:

this method is deprecated in favour of `Styler.format(precision=..)`

Out[22]:
  Lat Long Confirmed Deaths Recovered Active
Lat 1.00 -0.13 0.04 0.07 0.02 0.04
Long -0.13 1.00 -0.08 -0.10 -0.05 -0.09
Confirmed 0.04 -0.08 1.00 0.91 0.90 0.95
Deaths 0.07 -0.10 0.91 1.00 0.76 0.89
Recovered 0.02 -0.05 0.90 0.76 1.00 0.71
Active 0.04 -0.09 0.95 0.89 0.71 1.00
In [23]:
alg = c[c['Country/Region']== 'Algeria'][['Date','Recovered','Deaths','Active']]
alg.set_index('Date',inplace = True)
alg
Out[23]:
Recovered Deaths Active
Date
2020-01-22 0 0 0
2020-01-23 0 0 0
2020-01-24 0 0 0
2020-01-25 0 0 0
2020-01-26 0 0 0
... ... ... ...
2020-07-23 17369 1124 6991
2020-07-24 17369 1136 7654
2020-07-25 18076 1146 7542
2020-07-26 18088 1155 8114
2020-07-27 18837 1163 7973

188 rows × 3 columns

In [24]:
alg.resample('M')
Out[24]:
<pandas.core.resample.DatetimeIndexResampler object at 0x000002B08E8EE250>
In [25]:
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data = alg)
Out[25]:
<Axes: xlabel='Date'>
In [26]:
latest = c[c['Date'] == c['Date'].max()]
latest_map = latest.groupby('Country/Region')['Active','Confirmed','Deaths','Recovered'].sum().reset_index()
latest_map.head()
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\4070237916.py:2: FutureWarning:

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

Out[26]:
Country/Region Active Confirmed Deaths Recovered
0 Afghanistan 9796 36263 1269 25198
1 Albania 1991 4880 144 2745
2 Algeria 7973 27973 1163 18837
3 Andorra 52 907 52 803
4 Angola 667 950 41 242
In [27]:
fig = px.choropleth(latest_map, locations ='Country/Region', 
                    locationmode = 'country names',color = 'Deaths',
                    range_color = [1,10000]
                   )
fig.update_layout(title ="Death Records Across The World")
fig.show()
In [28]:
fig = px.choropleth(c,locations='Country/Region',locationmode='country names',color='Confirmed')
fig.update_layout(title='Choropleth Map of Confirmed Cases -till today',template="plotly_dark")
fig.show()
In [29]:
world_cases = c.groupby('Date')['Active',
                                      'Confirmed','Deaths'].sum().reset_index()
world_cases
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\944271916.py:1: FutureWarning:

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

Out[29]:
Date Active Confirmed Deaths
0 2020-01-22 510 555 17
1 2020-01-23 606 654 18
2 2020-01-24 879 941 26
3 2020-01-25 1353 1434 42
4 2020-01-26 2010 2118 56
... ... ... ... ...
183 2020-07-23 6166006 15510481 633506
184 2020-07-24 6212290 15791645 639650
185 2020-07-25 6243930 16047190 644517
186 2020-07-26 6309711 16251796 648621
187 2020-07-27 6358362 16480485 654036

188 rows × 4 columns

In [30]:
sns.lineplot(x = 'Date', y ='Active', data = world_cases)
Out[30]:
<Axes: xlabel='Date', ylabel='Active'>
In [32]:
for i in range(len(c)):
    if(c['Deaths'][i] > 0):
        print(c['Deaths'][i], c['Country/Region'][i], c['Date'][i])
        break
17 China 2020-01-22 00:00:00
In [33]:
c['date']=pd.to_datetime(c['Date']).dt.to_period('M')
c.head(3)
Out[33]:
Country/Region Lat Long Date Confirmed Deaths Recovered Active WHO Region date
0 Afghanistan 33.93911 67.709953 2020-01-22 0 0 0 0 Eastern Mediterranean 2020-01
1 Albania 41.15330 20.168300 2020-01-22 0 0 0 0 Europe 2020-01
2 Algeria 28.03390 1.659600 2020-01-22 0 0 0 0 Africa 2020-01
In [34]:
px.scatter(data_frame = c, x='Active',y='Deaths',color='WHO Region',title='The relation between the Active and death cases',
          log_x= True,
          range_y=[25,95],
           animation_frame= 'date'
           
          )
    
In [35]:
pio.templates.default = "plotly_dark"
px.scatter_3d(c,x="Confirmed",y="Deaths",z="Recovered")
In [36]:
px.line(c,x="Date",y="Recovered",title="Wolrd Wide Recovered")
In [ ]: